import re
import urllib.request

URL = 'https://www.hko.gov.hk/tc/gts/time/calendar/text/files/T%dc.txt'


def downloadData():
    for i in range(1921, 2031):
        url = URL % i
        print(url)
        response = urllib.request.urlopen(url)
        result = response.read().decode('big5')
        with open('data/' + str(i) + '.txt', 'w') as f:
            f.write(result)


def loadData(year):
    re_cal = re.compile('(\d{4})年(\d{1,2})月(\d{1,2})日')
    l = []
    with open("data/" + str(year) + ".txt", "r") as f:
        lines = f.readlines()
    for line in lines[3:]:
        m = re_cal.match(line)
        if m:
            line = line.strip()
            fds = line.split()
            l.append(fds[1])
    with open("data/" + str(year + 1) + ".txt", "r") as f:
        lines = f.readlines()
    for line in lines[3:]:
        m = re_cal.match(line)
        if m:
            line = line.strip()
            fds = line.split()
            l.append(fds[1])
    return l


def compute(year):
    r = ""
    l = ""
    MonName = ["正月", "二月", "三月", "四月", "五月", "六月", "七月", "八月", "九月", "十月", "十一月", "腊月"]
    MonName2 = ["*", "閏正月", "閏二月", "閏三月", "閏四月", "閏五月", "閏六月", "閏七月", "閏八月", "閏九月", "閏十月", "閏十一月", "閏腊月"]
    data = loadData(year)
    data = data[data.index('正月'):]
    data = data[:data.index('正月', 2)]
    while len(data) != 0:
        # print(data)
        if len(data) == 29:
            r = r + '0'
            break
        if data[29] == '三十':
            r = r + '1'
            if data[0] in MonName2:
                l = bin(MonName2.index(data[0]))
            data = data[30:]
            continue
        else:
            r = r + '0'
            if data[0] in MonName2:
                l = bin(MonName2.index(data[0]))
            data = data[29:]
            continue
    if l:
        res = l + "000" + r
        # print(res)
        return int(res, 2)
    else:
        # print(r)
        return int(r, 2)


def test_getData():
    # downloadData()
    l = []
    for i in range(1921, 2030):
        l.append(compute(i))
    print(l)
